{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ncjQuwOgT4W3"
      },
      "source": [
        "# 🗣️ Speech-AI-Forge Colab\n",
        "\n",
        "👋 This script is built on [Speech-AI-Forge](https://github.com/lenML/Speech-AI-Forge). If this project helps you, feel free to support us with a star on GitHub! Contributions via PRs and issues are also welcome~\n",
        "\n",
        "## Usage Guide\n",
        "\n",
        "1. Select **Runtime** from the menu.\n",
        "2. Click **Run All**.\n",
        "\n",
        "Once the process is complete, look for the following information in the log:\n",
        "\n",
        "```\n",
        "Running on public URL: https://**.gradio.live\n",
        "```\n",
        "\n",
        "This link will be the public URL you can access.\n",
        "\n",
        "> Note: If prompted to restart during package installation, please select \"No.\""
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "fvzfP6Tn8nOL"
      },
      "source": [
        "## Environment"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "09UPJy1gP045",
        "outputId": "079a9d2c-54aa-4ff5-a969-70e27029f3ed"
      },
      "outputs": [],
      "source": [
        "# 1. Clone the repository\n",
        "!git clone https://github.com/lenML/Speech-AI-Forge\n",
        "\n",
        "# 2. Change directory to the repository\n",
        "%cd Speech-AI-Forge\n",
        "\n",
        "# 3. Install ffmpeg and rubberband-cli\n",
        "!apt-get update -y\n",
        "!apt-get install -y ffmpeg rubberband-cli\n",
        "\n",
        "# 4. Install PyTorch\n",
        "!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121\n",
        "\n",
        "# 5. Install dependencies\n",
        "!pip install -r requirements.txt\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "hLupWhOXT1kt"
      },
      "source": [
        "## Download Models"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gZBGws7c8nON",
        "outputId": "73de300c-da8e-4fd5-ea37-73cfecd2f2f7"
      },
      "outputs": [],
      "source": [
        "# @markdown ## Model download instructions\n",
        "# @markdown Most models are close to 2GB in size, please ensure that you have enough storage space and network bandwidth. <br/>\n",
        "# @markdown **Note**: At least one TTS model must be selected. If no model is selected, ChatTTS will be downloaded by default.\n",
        "\n",
        "# @markdown ### TTS Models\n",
        "# @markdown ChatTTS: [GitHub](https://github.com/2noise/ChatTTS) - A generative speech model for daily dialogue.\n",
        "download_chattts = True  # @param {\"type\":\"boolean\"}\n",
        "# @markdown F5-TTS: [GitHub](https://github.com/SWivid/F5-TTS) - A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching\n",
        "download_f5_tts = False  # @param {\"type\":\"boolean\"}\n",
        "# @markdown CosyVoice: [GitHub](https://github.com/FunAudioLLM/CosyVoice) - Multi-lingual large voice generation model, providing inference, training and deployment full-stack ability.\n",
        "download_cosyvoice = False  # @param {\"type\":\"boolean\"}\n",
        "# @markdown FireRedTTS: [GitHub](https://github.com/FireRedTeam/FireRedTTS) - An Open-Sourced LLM-empowered Foundation TTS System\n",
        "download_fire_red_tts = False  # @param {\"type\":\"boolean\"}\n",
        "# @markdown FishSpeech: [GitHub](https://github.com/fishaudio/fish-speech) - Brand new TTS solution\n",
        "download_fish_speech = False  # @param {\"type\":\"boolean\"}\n",
        "\n",
        "# @markdown ### ASR Models\n",
        "# @markdown Whisper: [GitHub](https://github.com/openai/whisper) - Robust Speech Recognition via Large-Scale Weak Supervision\n",
        "download_whisper = True  # @param {\"type\":\"boolean\"}\n",
        "\n",
        "# @markdown ### Clone Voice Models\n",
        "# @markdown OpenVoice: [GitHub](https://github.com/myshell-ai/OpenVoice) - Instant voice cloning by MIT and MyShell.\n",
        "download_open_voice = False  # @param {\"type\":\"boolean\"}\n",
        "\n",
        "# @markdown ### Enhance Models\n",
        "# @markdown resemble-enhance: [GitHub](https://github.com/resemble-ai/resemble-enhance) - AI powered speech denoising and enhancement\n",
        "download_enhancer = True  # @param {\"type\":\"boolean\"}\n",
        "\n",
        "def download_model(command):\n",
        "    print(f\"Executing: {command}\")\n",
        "    !{command}\n",
        "\n",
        "# 检查是否至少选择了一个 TTS 模型\n",
        "if not any([download_chattts, download_fish_speech, download_cosyvoice, download_fire_red_tts, download_f5_tts]):\n",
        "    print(\"未选择任何 TTS 模型，默认下载 ChatTTS...\")\n",
        "    download_chattts = True\n",
        "\n",
        "# TTS 模型下载\n",
        "if download_chattts:\n",
        "    download_model(\"python -m scripts.dl_chattts --source huggingface\")\n",
        "\n",
        "if download_fish_speech:\n",
        "    download_model(\"python -m scripts.downloader.fish_speech_1_2sft --source huggingface\")\n",
        "\n",
        "if download_cosyvoice:\n",
        "    download_model(\"python -m scripts.dl_cosyvoice_instruct --source huggingface\")\n",
        "\n",
        "if download_fire_red_tts:\n",
        "    download_model(\"python -m scripts.downloader.fire_red_tts --source huggingface\")\n",
        "\n",
        "if download_f5_tts:\n",
        "    download_model(\"python -m scripts.downloader.f5_tts --source huggingface\")\n",
        "    download_model(\"python -m scripts.downloader.vocos_mel_24khz --source huggingface\")\n",
        "\n",
        "# ASR 模型下载\n",
        "if download_whisper:\n",
        "    download_model(\"python -m scripts.downloader.faster_whisper --source huggingface\")\n",
        "\n",
        "# Clone Voice 模型下载\n",
        "if download_open_voice:\n",
        "    download_model(\"python -m scripts.downloader.open_voice --source huggingface\")\n",
        "\n",
        "# 增强模型下载\n",
        "if download_enhancer:\n",
        "    download_model(\"python -m scripts.dl_enhance --source huggingface\")\n",
        "\n",
        "print(\"All selected models have been downloaded!\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "RtpfdFem8nOO"
      },
      "source": [
        "## Run WebUI"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "hi6eW_KLDc_p",
        "outputId": "5de77d0c-a3ee-4b86-87a8-490fdcdda794"
      },
      "outputs": [],
      "source": [
        "!nvcc --version"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "J5CkIs0nFZKi",
        "outputId": "b21554f2-e82b-422e-dc82-dfc675bc6c49"
      },
      "outputs": [],
      "source": [
        "!nvidia-smi"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "X8x_dWk88nOO",
        "outputId": "9029743a-d214-4385-a241-ef5de1f2c110"
      },
      "outputs": [],
      "source": [
        "!python webui.py --share --language=en"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "T4",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
